import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import statsmodels.api as sm
mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['axes.grid'] = False
df_power= pd.read_csv('power_actual.csv')
df_weather=pd.read_csv('weather_actuals.csv')
df_power.isnull().sum()
Unnamed: 0 0 datetime 0 ghi 35040 gti 35040 power 0 dtype: int64
df_power.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 70080 entries, 0 to 70079 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 70080 non-null int64 1 datetime 70080 non-null object 2 ghi 35040 non-null float64 3 gti 35040 non-null float64 4 power 70080 non-null float64 dtypes: float64(3), int64(1), object(1) memory usage: 2.7+ MB
df_power.replace(np.nan,method='ffill',inplace=True)
Unnamed: 0 None datetime None ghi None gti None power None dtype: object
df_power.describe()
| Unnamed: 0 | ghi | gti | power | |
|---|---|---|---|---|
| count | 70080.000000 | 70080.0 | 70080.0 | 70080.000000 |
| mean | 35039.500000 | 0.0 | 0.0 | 28.612915 |
| std | 20230.497769 | 0.0 | 0.0 | 498.795887 |
| min | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 25% | 17519.750000 | 0.0 | 0.0 | 0.000000 |
| 50% | 35039.500000 | 0.0 | 0.0 | 0.000000 |
| 75% | 52559.250000 | 0.0 | 0.0 | 5.540000 |
| max | 70079.000000 | 0.0 | 0.0 | 16172.040000 |
df_power['datetime']=pd.to_datetime(df_power['datetime'])
df_power.describe()
| Unnamed: 0 | ghi | gti | power | |
|---|---|---|---|---|
| count | 70080.000000 | 70080.0 | 70080.0 | 70080.000000 |
| mean | 35039.500000 | 0.0 | 0.0 | 28.612915 |
| std | 20230.497769 | 0.0 | 0.0 | 498.795887 |
| min | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 25% | 17519.750000 | 0.0 | 0.0 | 0.000000 |
| 50% | 35039.500000 | 0.0 | 0.0 | 0.000000 |
| 75% | 52559.250000 | 0.0 | 0.0 | 5.540000 |
| max | 70079.000000 | 0.0 | 0.0 | 16172.040000 |
df_power.tail()
| Unnamed: 0 | datetime | ghi | gti | power | |
|---|---|---|---|---|---|
| 70075 | 70075 | 2019-09-30 22:45:00 | 0.0 | 0.0 | 0.0 |
| 70076 | 70076 | 2019-09-30 23:00:00 | 0.0 | 0.0 | 0.0 |
| 70077 | 70077 | 2019-09-30 23:15:00 | 0.0 | 0.0 | 0.0 |
| 70078 | 70078 | 2019-09-30 23:30:00 | 0.0 | 0.0 | 0.0 |
| 70079 | 70079 | 2019-09-30 23:45:00 | 0.0 | 0.0 | 0.0 |
df_power.max()
Unnamed: 0 70079 datetime 2019-09-30 23:45:00 ghi 0 gti 0 power 16172 dtype: object
fig=px.line(df_power,x='datetime',y='power',title='Energy generation')
fig.update_xaxes(
rangeslider_visible= True,
rangeselector=dict(
buttons = list([
dict(count = 1,label = '1m',step='month',stepmode = "backward"),
dict(count = 2,label = '6m',step='month',stepmode = "backward"),
dict(count = 3,label = '6mYTD',step='month',stepmode = "todate"),
dict(count = 4,label = '2y',step='year',stepmode = "backward"),
dict(count = 5,label = '3y',step='year',stepmode = "backward"),
dict(step= 'all')
])
)
)
fig.show()
df_power.set_index('datetime')
| Unnamed: 0 | ghi | gti | power | |
|---|---|---|---|---|
| datetime | ||||
| 2017-10-01 00:00:00 | 0 | 0.0 | 0.0 | 0.0 |
| 2017-10-01 00:15:00 | 1 | 0.0 | 0.0 | 0.0 |
| 2017-10-01 00:30:00 | 2 | 0.0 | 0.0 | 0.0 |
| 2017-10-01 00:45:00 | 3 | 0.0 | 0.0 | 0.0 |
| 2017-10-01 01:00:00 | 4 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... |
| 2019-09-30 22:45:00 | 70075 | 0.0 | 0.0 | 0.0 |
| 2019-09-30 23:00:00 | 70076 | 0.0 | 0.0 | 0.0 |
| 2019-09-30 23:15:00 | 70077 | 0.0 | 0.0 | 0.0 |
| 2019-09-30 23:30:00 | 70078 | 0.0 | 0.0 | 0.0 |
| 2019-09-30 23:45:00 | 70079 | 0.0 | 0.0 | 0.0 |
70080 rows × 4 columns
df_power.plot(subplots=True)
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>], dtype=object)
df_power=df_power.drop(['ghi', 'gti','Unnamed: 0'], axis=1)
pd.date_range(start = '2017-10-01', end = '2019-09-30' ).difference(df_power.index)
DatetimeIndex(['2017-10-01', '2017-10-02', '2017-10-03', '2017-10-04',
'2017-10-05', '2017-10-06', '2017-10-07', '2017-10-08',
'2017-10-09', '2017-10-10',
...
'2019-09-21', '2019-09-22', '2019-09-23', '2019-09-24',
'2019-09-25', '2019-09-26', '2019-09-27', '2019-09-28',
'2019-09-29', '2019-09-30'],
dtype='datetime64[ns]', length=730, freq=None)
df_n=df_power.copy()
df = df_power.resample('1H', on='datetime').mean()
df.head(10)
| power | |
|---|---|
| datetime | |
| 2017-10-01 00:00:00 | 0.0000 |
| 2017-10-01 01:00:00 | 0.0000 |
| 2017-10-01 02:00:00 | 0.0000 |
| 2017-10-01 03:00:00 | 0.0000 |
| 2017-10-01 04:00:00 | 0.0000 |
| 2017-10-01 05:00:00 | 0.0000 |
| 2017-10-01 06:00:00 | 0.0200 |
| 2017-10-01 07:00:00 | 1.1925 |
| 2017-10-01 08:00:00 | 5.1375 |
| 2017-10-01 09:00:00 | 8.2250 |
df.max()
power 15091.685 dtype: float64
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 17520 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Freq: H Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 power 17520 non-null float64 dtypes: float64(1) memory usage: 273.8 KB
df_weather.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13619 entries, 0 to 13618 Data columns (total 31 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 13619 non-null int64 1 plant_id 13619 non-null int64 2 datetime_utc 13619 non-null object 3 datetime_local 13619 non-null object 4 cloud_cover 13619 non-null float64 5 apparent_temperature 13619 non-null float64 6 temperature 13619 non-null int64 7 humidity 13619 non-null int64 8 dew_point 13619 non-null float64 9 wind_bearing 13619 non-null int64 10 wind_speed 13619 non-null float64 11 wind_chill 3744 non-null float64 12 wind_gust 13619 non-null float64 13 heat_index 3744 non-null float64 14 pressure 13619 non-null float64 15 qpf 3744 non-null float64 16 uv_index 13619 non-null int64 17 snow 3744 non-null float64 18 pop 3744 non-null float64 19 fctcode 3744 non-null float64 20 ozone 13619 non-null float64 21 precip_accumulation 3744 non-null float64 22 precip_intensity 13619 non-null float64 23 precip_probability 13619 non-null float64 24 precip_type 6490 non-null object 25 visibility 13619 non-null float64 26 sunrise 13619 non-null object 27 sunset 13619 non-null object 28 icon 13619 non-null object 29 summary 13619 non-null object 30 updated_at 13619 non-null object dtypes: float64(17), int64(6), object(8) memory usage: 3.2+ MB
df_weather
| Unnamed: 0 | plant_id | datetime_utc | datetime_local | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | ... | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | sunrise | sunset | icon | summary | updated_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 2017-09-30 18:30:00 | 2017-10-01 00:00:00 | 0.00 | 26.52 | 26 | 1 | 19.46 | 308 | ... | -9999.0 | -9999.0 | -9999.0 | rain | 3.57 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | clear-night | Clear | 2019-07-16 16:37:16 |
| 1 | 1 | 1 | 2017-09-30 19:30:00 | 2017-10-01 01:00:00 | 0.00 | 28.78 | 27 | 1 | 20.06 | -9999 | ... | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy | 2019-07-16 16:37:16 |
| 2 | 2 | 1 | 2017-09-30 20:30:00 | 2017-10-01 02:00:00 | 0.00 | 26.58 | 26 | 1 | 19.97 | -9999 | ... | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy | 2019-07-16 16:37:16 |
| 3 | 3 | 1 | 2017-09-30 21:30:00 | 2017-10-01 03:00:00 | 0.00 | 23.84 | 23 | 1 | 18.66 | 270 | ... | -9999.0 | -9999.0 | -9999.0 | rain | 3.56 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | clear-night | Clear | 2019-07-16 16:37:16 |
| 4 | 4 | 1 | 2017-09-30 22:30:00 | 2017-10-01 04:00:00 | 0.00 | 24.13 | 24 | 1 | 17.84 | -9999 | ... | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy | 2019-07-16 16:37:16 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 13614 | 13614 | 1 | 2019-09-30 13:30:00 | 2019-09-30 19:00:00 | 0.16 | 31.66 | 29 | 1 | 21.19 | 40 | ... | NaN | 0.0 | 0.0 | NaN | 16.09 | 2019-09-30 00:51:16 | 2019-09-30 12:46:54 | partly-cloudy-night | Partly Cloudy | 2019-10-01 23:32:46 |
| 13615 | 13615 | 1 | 2019-09-30 14:30:00 | 2019-09-30 20:00:00 | 0.16 | 30.42 | 28 | 1 | 21.35 | 59 | ... | NaN | 0.0 | 0.0 | NaN | 16.09 | 2019-09-30 00:51:16 | 2019-09-30 12:46:54 | partly-cloudy-night | Partly Cloudy | 2019-10-01 23:32:46 |
| 13616 | 13616 | 1 | 2019-09-30 15:30:00 | 2019-09-30 21:00:00 | 0.18 | 29.56 | 27 | 1 | 21.61 | 76 | ... | NaN | 0.0 | 0.0 | NaN | 16.09 | 2019-09-30 00:51:16 | 2019-09-30 12:46:54 | partly-cloudy-night | Partly Cloudy | 2019-10-01 23:32:46 |
| 13617 | 13617 | 1 | 2019-09-30 16:30:00 | 2019-09-30 22:00:00 | 0.30 | 28.97 | 27 | 1 | 21.99 | 88 | ... | NaN | 0.0 | 0.0 | NaN | 16.09 | 2019-09-30 00:51:16 | 2019-09-30 12:46:54 | partly-cloudy-night | Partly Cloudy | 2019-10-01 23:32:46 |
| 13618 | 13618 | 1 | 2019-09-30 17:30:00 | 2019-09-30 23:00:00 | 0.35 | 28.15 | 26 | 1 | 22.09 | 95 | ... | NaN | 0.0 | 0.0 | NaN | 16.09 | 2019-09-30 00:51:16 | 2019-09-30 12:46:54 | partly-cloudy-night | Partly Cloudy | 2019-10-01 23:32:46 |
13619 rows × 31 columns
df_weather.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13619 entries, 0 to 13618 Data columns (total 31 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 13619 non-null int64 1 plant_id 13619 non-null int64 2 datetime_utc 13619 non-null object 3 datetime_local 13619 non-null object 4 cloud_cover 13619 non-null float64 5 apparent_temperature 13619 non-null float64 6 temperature 13619 non-null int64 7 humidity 13619 non-null int64 8 dew_point 13619 non-null float64 9 wind_bearing 13619 non-null int64 10 wind_speed 13619 non-null float64 11 wind_chill 3744 non-null float64 12 wind_gust 13619 non-null float64 13 heat_index 3744 non-null float64 14 pressure 13619 non-null float64 15 qpf 3744 non-null float64 16 uv_index 13619 non-null int64 17 snow 3744 non-null float64 18 pop 3744 non-null float64 19 fctcode 3744 non-null float64 20 ozone 13619 non-null float64 21 precip_accumulation 3744 non-null float64 22 precip_intensity 13619 non-null float64 23 precip_probability 13619 non-null float64 24 precip_type 6490 non-null object 25 visibility 13619 non-null float64 26 sunrise 13619 non-null object 27 sunset 13619 non-null object 28 icon 13619 non-null object 29 summary 13619 non-null object 30 updated_at 13619 non-null object dtypes: float64(17), int64(6), object(8) memory usage: 3.2+ MB
df_weather.precip_type.value_counts()
rain 6147 -9999 343 Name: precip_type, dtype: int64
df_weather.precip_type.unique()
array(['rain', '-9999', nan], dtype=object)
df_weather.corr()
| Unnamed: 0 | plant_id | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | ... | qpf | uv_index | snow | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | visibility | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Unnamed: 0 | 1.000000 | NaN | 0.214831 | 0.506807 | 0.459176 | -0.082972 | 0.245485 | 0.365803 | 0.336288 | NaN | ... | NaN | 0.739429 | NaN | NaN | NaN | 0.739716 | NaN | 0.739380 | 0.739377 | 0.096399 |
| plant_id | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| cloud_cover | 0.214831 | NaN | 1.000000 | 0.166379 | 0.164960 | -0.071690 | 0.062932 | 0.070571 | 0.075671 | NaN | ... | NaN | 0.320889 | NaN | NaN | NaN | 0.320888 | NaN | 0.320889 | 0.320889 | -0.018911 |
| apparent_temperature | 0.506807 | NaN | 0.166379 | 1.000000 | 0.969744 | -0.181759 | 0.567757 | 0.220673 | 0.187573 | NaN | ... | NaN | 0.385864 | NaN | NaN | NaN | 0.386468 | NaN | 0.385658 | 0.385654 | 0.028750 |
| temperature | 0.459176 | NaN | 0.164960 | 0.969744 | 1.000000 | -0.324518 | 0.432929 | 0.223806 | 0.186522 | NaN | ... | NaN | 0.368620 | NaN | NaN | NaN | 0.369282 | NaN | 0.368406 | 0.368404 | 0.014180 |
| humidity | -0.082972 | NaN | -0.071690 | -0.181759 | -0.324518 | 1.000000 | 0.497798 | -0.219560 | -0.191230 | NaN | ... | NaN | -0.185654 | NaN | NaN | NaN | -0.185402 | NaN | -0.185555 | -0.185561 | 0.053018 |
| dew_point | 0.245485 | NaN | 0.062932 | 0.567757 | 0.432929 | 0.497798 | 1.000000 | -0.005577 | -0.005434 | NaN | ... | NaN | 0.085216 | NaN | NaN | NaN | 0.085929 | NaN | 0.085163 | 0.085154 | 0.046960 |
| wind_bearing | 0.365803 | NaN | 0.070571 | 0.220673 | 0.223806 | -0.219560 | -0.005577 | 1.000000 | 0.890365 | NaN | ... | NaN | 0.471057 | NaN | NaN | NaN | 0.471057 | NaN | 0.471054 | 0.471054 | -0.031508 |
| wind_speed | 0.336288 | NaN | 0.075671 | 0.187573 | 0.186522 | -0.191230 | -0.005434 | 0.890365 | 1.000000 | NaN | ... | NaN | 0.427106 | NaN | NaN | NaN | 0.427104 | NaN | 0.427106 | 0.427106 | -0.025176 |
| wind_chill | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| wind_gust | 0.739460 | NaN | 0.320889 | 0.385751 | 0.368493 | -0.185495 | 0.085277 | 0.471055 | 0.427106 | NaN | ... | NaN | 1.000000 | NaN | NaN | NaN | 0.999996 | NaN | 1.000000 | 1.000000 | -0.059133 |
| heat_index | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| pressure | 0.570359 | NaN | 0.401547 | 0.281708 | 0.264619 | -0.124952 | 0.069054 | 0.455247 | 0.476385 | NaN | ... | NaN | 0.772193 | NaN | NaN | NaN | 0.772187 | NaN | 0.772193 | 0.772193 | -0.045748 |
| qpf | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| uv_index | 0.739429 | NaN | 0.320889 | 0.385864 | 0.368620 | -0.185654 | 0.085216 | 0.471057 | 0.427106 | NaN | ... | NaN | 1.000000 | NaN | NaN | NaN | 0.999995 | NaN | 1.000000 | 1.000000 | -0.059144 |
| snow | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| pop | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| fctcode | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ozone | 0.739716 | NaN | 0.320888 | 0.386468 | 0.369282 | -0.185402 | 0.085929 | 0.471057 | 0.427104 | NaN | ... | NaN | 0.999995 | NaN | NaN | NaN | 1.000000 | NaN | 0.999995 | 0.999995 | -0.059745 |
| precip_accumulation | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| precip_intensity | 0.739380 | NaN | 0.320889 | 0.385658 | 0.368406 | -0.185555 | 0.085163 | 0.471054 | 0.427106 | NaN | ... | NaN | 1.000000 | NaN | NaN | NaN | 0.999995 | NaN | 1.000000 | 1.000000 | -0.059145 |
| precip_probability | 0.739377 | NaN | 0.320889 | 0.385654 | 0.368404 | -0.185561 | 0.085154 | 0.471054 | 0.427106 | NaN | ... | NaN | 1.000000 | NaN | NaN | NaN | 0.999995 | NaN | 1.000000 | 1.000000 | -0.059146 |
| visibility | 0.096399 | NaN | -0.018911 | 0.028750 | 0.014180 | 0.053018 | 0.046960 | -0.031508 | -0.025176 | NaN | ... | NaN | -0.059144 | NaN | NaN | NaN | -0.059745 | NaN | -0.059145 | -0.059146 | 1.000000 |
23 rows × 23 columns
Used Frequency encoding for categorical features
data_obj=df_weather.select_dtypes(include=[object])
data_nonobj=df_weather.select_dtypes(exclude=[object])
data_obj.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13619 entries, 0 to 13618 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 datetime_utc 13619 non-null object 1 datetime_local 13619 non-null object 2 precip_type 6490 non-null object 3 sunrise 13619 non-null object 4 sunset 13619 non-null object 5 icon 13619 non-null object 6 summary 13619 non-null object 7 updated_at 13619 non-null object dtypes: object(8) memory usage: 851.3+ KB
data_nonobj.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13619 entries, 0 to 13618 Data columns (total 23 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 13619 non-null int64 1 plant_id 13619 non-null int64 2 cloud_cover 13619 non-null float64 3 apparent_temperature 13619 non-null float64 4 temperature 13619 non-null int64 5 humidity 13619 non-null int64 6 dew_point 13619 non-null float64 7 wind_bearing 13619 non-null int64 8 wind_speed 13619 non-null float64 9 wind_chill 3744 non-null float64 10 wind_gust 13619 non-null float64 11 heat_index 3744 non-null float64 12 pressure 13619 non-null float64 13 qpf 3744 non-null float64 14 uv_index 13619 non-null int64 15 snow 3744 non-null float64 16 pop 3744 non-null float64 17 fctcode 3744 non-null float64 18 ozone 13619 non-null float64 19 precip_accumulation 3744 non-null float64 20 precip_intensity 13619 non-null float64 21 precip_probability 13619 non-null float64 22 visibility 13619 non-null float64 dtypes: float64(17), int64(6) memory usage: 2.4 MB
df_weather=df_weather.drop(['datetime_utc','plant_id','Unnamed: 0','updated_at'], axis=1)
df_weather.head()
| datetime_local | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | ... | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | sunrise | sunset | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2017-10-01 00:00:00 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308 | 0.83 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | rain | 3.57 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | clear-night | Clear |
| 1 | 2017-10-01 01:00:00 | 0.0 | 28.78 | 27 | 1 | 20.06 | -9999 | -9999.00 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy |
| 2 | 2017-10-01 02:00:00 | 0.0 | 26.58 | 26 | 1 | 19.97 | -9999 | -9999.00 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy |
| 3 | 2017-10-01 03:00:00 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270 | 0.45 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | rain | 3.56 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | clear-night | Clear |
| 4 | 2017-10-01 04:00:00 | 0.0 | 24.13 | 24 | 1 | 17.84 | -9999 | -9999.00 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | rain | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | fog | Foggy |
5 rows × 27 columns
df_weather['datetime_local']=pd.to_datetime(df_weather['datetime_local'])
df_weather['sunrise']=pd.to_datetime(df_weather['sunrise'])
df_weather['sunset']=pd.to_datetime(df_weather['sunset'])
df_weather.set_index('datetime_local',inplace=True)
df_weather.describe()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | qpf | uv_index | snow | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | visibility | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 3744.0 | 13619.000000 | 3744.0 | ... | 3744.0 | 13619.000000 | 3744.0 | 3744.0 | 3744.0 | 13619.000000 | 3744.0 | 13619.000000 | 13619.000000 | 13619.000000 |
| mean | -314.005310 | 25.944639 | 24.498054 | 0.537411 | 11.924626 | -498.677363 | -540.808704 | -9999.0 | -2393.184251 | -9999.0 | ... | -9999.0 | -2394.989280 | -9999.0 | -9999.0 | -9999.0 | -2187.730797 | -9999.0 | -2396.367775 | -2396.398081 | -117.086149 |
| std | 1744.610028 | 10.469808 | 8.840016 | 0.498617 | 9.129459 | 2557.943872 | 2267.247300 | 0.0 | 4270.333680 | 0.0 | ... | 0.0 | 4269.320576 | 0.0 | 0.0 | 0.0 | 4385.706548 | 0.0 | 4268.545854 | 4268.528833 | 1130.763316 |
| min | -9999.000000 | 3.140000 | 5.000000 | 0.000000 | -20.560000 | -9999.000000 | -9999.000000 | -9999.0 | -9999.000000 | -9999.0 | ... | -9999.0 | -9999.000000 | -9999.0 | -9999.0 | -9999.0 | -9999.000000 | -9999.0 | -9999.000000 | -9999.000000 | -9999.000000 |
| 25% | 0.000000 | 17.310000 | 17.000000 | 0.000000 | 5.920000 | 79.000000 | 1.620000 | -9999.0 | 1.185000 | -9999.0 | ... | -9999.0 | 0.000000 | -9999.0 | -9999.0 | -9999.0 | 236.900000 | -9999.0 | 0.000000 | 0.000000 | 3.540000 |
| 50% | 0.000000 | 25.540000 | 25.000000 | 1.000000 | 10.710000 | 149.000000 | 2.410000 | -9999.0 | 3.070000 | -9999.0 | ... | -9999.0 | 0.000000 | -9999.0 | -9999.0 | -9999.0 | 270.800000 | -9999.0 | 0.000000 | 0.000000 | 16.090000 |
| 75% | 0.410000 | 34.580000 | 31.000000 | 1.000000 | 19.670000 | 293.000000 | 3.330000 | -9999.0 | 4.710000 | -9999.0 | ... | -9999.0 | 2.000000 | -9999.0 | -9999.0 | -9999.0 | 280.800000 | -9999.0 | 0.000000 | 0.000000 | 16.090000 |
| max | 1.000000 | 49.750000 | 47.000000 | 1.000000 | 28.350000 | 359.000000 | 9.730000 | -9999.0 | 15.150000 | -9999.0 | ... | -9999.0 | 12.000000 | -9999.0 | -9999.0 | -9999.0 | 351.700000 | -9999.0 | 6.220000 | 0.730000 | 16.090000 |
8 rows × 21 columns
fe = df_weather.groupby('icon').size()/len(df_weather)
df_weather.loc[:,'icon']=df_weather['icon'].map(fe)
fe
icon clear-day 0.220060 clear-night 0.225788 cloudy 0.095822 fog 0.214406 partly-cloudy-day 0.129819 partly-cloudy-night 0.107056 rain 0.006902 wind 0.000147 dtype: float64
fe.plot()
<AxesSubplot:xlabel='icon'>
fe_s = df_weather.groupby('summary').size()/len(df_weather)
df_weather.loc[:,'summary']=df_weather['summary'].map(fe_s)
fe_s
summary Breezy and Foggy 0.000147 Clear 0.408767 Foggy 0.214259 Humid 0.037081 Humid and Foggy 0.000147 Humid and Mostly Cloudy 0.060651 Humid and Overcast 0.052427 Humid and Partly Cloudy 0.042808 Light Rain 0.000073 Light Rain and Humid 0.000294 Mostly Cloudy 0.046846 Overcast 0.043395 Partly Cloudy 0.086570 Possible Drizzle 0.000073 Possible Drizzle and Humid 0.000073 Possible Light Rain 0.001469 Possible Light Rain and Humid 0.003818 Rain 0.000147 Rain and Humid 0.000955 dtype: float64
fe_s.plot()
<AxesSubplot:xlabel='summary'>
fe_p = df_weather.groupby('precip_type').size()/len(df_weather)
df_weather.loc[:,'precip_type']=df_weather['precip_type'].map(fe_p)
fe_p
precip_type -9999 0.025185 rain 0.451355 dtype: float64
df_weather.head()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | sunrise | sunset | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime_local | |||||||||||||||||||||
| 2017-10-01 00:00:00 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308 | 0.83 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.57 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.0 | 28.78 | 27 | 1 | 20.06 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.0 | 26.58 | 26 | 1 | 19.97 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270 | 0.45 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.56 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.0 | 24.13 | 24 | 1 | 17.84 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 2017-10-01 00:52:02 | 2017-10-01 12:45:07 | 0.214406 | 0.214259 |
5 rows × 26 columns
df_weather.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 13619 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 13619 non-null float64 1 apparent_temperature 13619 non-null float64 2 temperature 13619 non-null int64 3 humidity 13619 non-null int64 4 dew_point 13619 non-null float64 5 wind_bearing 13619 non-null int64 6 wind_speed 13619 non-null float64 7 wind_chill 3744 non-null float64 8 wind_gust 13619 non-null float64 9 heat_index 3744 non-null float64 10 pressure 13619 non-null float64 11 qpf 3744 non-null float64 12 uv_index 13619 non-null int64 13 snow 3744 non-null float64 14 pop 3744 non-null float64 15 fctcode 3744 non-null float64 16 ozone 13619 non-null float64 17 precip_accumulation 3744 non-null float64 18 precip_intensity 13619 non-null float64 19 precip_probability 13619 non-null float64 20 precip_type 6490 non-null float64 21 visibility 13619 non-null float64 22 sunrise 13619 non-null datetime64[ns] 23 sunset 13619 non-null datetime64[ns] 24 icon 13619 non-null float64 25 summary 13619 non-null float64 dtypes: datetime64[ns](2), float64(20), int64(4) memory usage: 2.8 MB
df_weather.isnull().sum()
cloud_cover 0 apparent_temperature 0 temperature 0 humidity 0 dew_point 0 wind_bearing 0 wind_speed 0 wind_chill 9875 wind_gust 0 heat_index 9875 pressure 0 qpf 9875 uv_index 0 snow 9875 pop 9875 fctcode 9875 ozone 0 precip_accumulation 9875 precip_intensity 0 precip_probability 0 precip_type 7129 visibility 0 sunrise 0 sunset 0 icon 0 summary 0 dtype: int64
df_weather['precip_type'].value_counts()
0.451355 6147 0.025185 343 Name: precip_type, dtype: int64
df_weather.describe()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 3744.0 | 13619.000000 | 3744.0 | ... | 3744.0 | 3744.0 | 13619.000000 | 3744.0 | 13619.000000 | 13619.000000 | 6490.000000 | 13619.000000 | 13619.000000 | 13619.000000 |
| mean | -314.005310 | 25.944639 | 24.498054 | 0.537411 | 11.924626 | -498.677363 | -540.808704 | -9999.0 | -2393.184251 | -9999.0 | ... | -9999.0 | -9999.0 | -2187.730797 | -9999.0 | -2396.367775 | -2396.398081 | 0.428831 | -117.086149 | 0.182920 | 0.234222 |
| std | 1744.610028 | 10.469808 | 8.840016 | 0.498617 | 9.129459 | 2557.943872 | 2267.247300 | 0.0 | 4270.333680 | 0.0 | ... | 0.0 | 0.0 | 4385.706548 | 0.0 | 4268.545854 | 4268.528833 | 0.095356 | 1130.763316 | 0.053470 | 0.156872 |
| min | -9999.000000 | 3.140000 | 5.000000 | 0.000000 | -20.560000 | -9999.000000 | -9999.000000 | -9999.0 | -9999.000000 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.000000 | -9999.0 | -9999.000000 | -9999.000000 | 0.025185 | -9999.000000 | 0.000147 | 0.000073 |
| 25% | 0.000000 | 17.310000 | 17.000000 | 0.000000 | 5.920000 | 79.000000 | 1.620000 | -9999.0 | 1.185000 | -9999.0 | ... | -9999.0 | -9999.0 | 236.900000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 3.540000 | 0.129819 | 0.060651 |
| 50% | 0.000000 | 25.540000 | 25.000000 | 1.000000 | 10.710000 | 149.000000 | 2.410000 | -9999.0 | 3.070000 | -9999.0 | ... | -9999.0 | -9999.0 | 270.800000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.214406 | 0.214259 |
| 75% | 0.410000 | 34.580000 | 31.000000 | 1.000000 | 19.670000 | 293.000000 | 3.330000 | -9999.0 | 4.710000 | -9999.0 | ... | -9999.0 | -9999.0 | 280.800000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.220060 | 0.408767 |
| max | 1.000000 | 49.750000 | 47.000000 | 1.000000 | 28.350000 | 359.000000 | 9.730000 | -9999.0 | 15.150000 | -9999.0 | ... | -9999.0 | -9999.0 | 351.700000 | -9999.0 | 6.220000 | 0.730000 | 0.451355 | 16.090000 | 0.225788 | 0.408767 |
8 rows × 24 columns
df_weather=df_weather.drop(['sunrise','sunset'],axis=1)
df_weather.head()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime_local | |||||||||||||||||||||
| 2017-10-01 00:00:00 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308 | 0.83 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.57 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.0 | 28.78 | 27 | 1 | 20.06 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.0 | 26.58 | 26 | 1 | 19.97 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270 | 0.45 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.56 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.0 | 24.13 | 24 | 1 | 17.84 | -9999 | -9999.00 | -9999.0 | -9999.0 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | -9999.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
5 rows × 24 columns
df_weather.describe(include='all')
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 3744.0 | 13619.000000 | 3744.0 | ... | 3744.0 | 3744.0 | 13619.000000 | 3744.0 | 13619.000000 | 13619.000000 | 6490.000000 | 13619.000000 | 13619.000000 | 13619.000000 |
| mean | -314.005310 | 25.944639 | 24.498054 | 0.537411 | 11.924626 | -498.677363 | -540.808704 | -9999.0 | -2393.184251 | -9999.0 | ... | -9999.0 | -9999.0 | -2187.730797 | -9999.0 | -2396.367775 | -2396.398081 | 0.428831 | -117.086149 | 0.182920 | 0.234222 |
| std | 1744.610028 | 10.469808 | 8.840016 | 0.498617 | 9.129459 | 2557.943872 | 2267.247300 | 0.0 | 4270.333680 | 0.0 | ... | 0.0 | 0.0 | 4385.706548 | 0.0 | 4268.545854 | 4268.528833 | 0.095356 | 1130.763316 | 0.053470 | 0.156872 |
| min | -9999.000000 | 3.140000 | 5.000000 | 0.000000 | -20.560000 | -9999.000000 | -9999.000000 | -9999.0 | -9999.000000 | -9999.0 | ... | -9999.0 | -9999.0 | -9999.000000 | -9999.0 | -9999.000000 | -9999.000000 | 0.025185 | -9999.000000 | 0.000147 | 0.000073 |
| 25% | 0.000000 | 17.310000 | 17.000000 | 0.000000 | 5.920000 | 79.000000 | 1.620000 | -9999.0 | 1.185000 | -9999.0 | ... | -9999.0 | -9999.0 | 236.900000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 3.540000 | 0.129819 | 0.060651 |
| 50% | 0.000000 | 25.540000 | 25.000000 | 1.000000 | 10.710000 | 149.000000 | 2.410000 | -9999.0 | 3.070000 | -9999.0 | ... | -9999.0 | -9999.0 | 270.800000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.214406 | 0.214259 |
| 75% | 0.410000 | 34.580000 | 31.000000 | 1.000000 | 19.670000 | 293.000000 | 3.330000 | -9999.0 | 4.710000 | -9999.0 | ... | -9999.0 | -9999.0 | 280.800000 | -9999.0 | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.220060 | 0.408767 |
| max | 1.000000 | 49.750000 | 47.000000 | 1.000000 | 28.350000 | 359.000000 | 9.730000 | -9999.0 | 15.150000 | -9999.0 | ... | -9999.0 | -9999.0 | 351.700000 | -9999.0 | 6.220000 | 0.730000 | 0.451355 | 16.090000 | 0.225788 | 0.408767 |
8 rows × 24 columns
df_weather.replace(-9999, np.nan,inplace=True)
df_weather.fillna(0)
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime_local | |||||||||||||||||||||
| 2017-10-01 00:00:00 | 0.00 | 26.52 | 26 | 1 | 19.46 | 308.0 | 0.83 | 0.0 | 0.00 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.57 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.00 | 28.78 | 27 | 1 | 20.06 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.00 | 26.58 | 26 | 1 | 19.97 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.00 | 23.84 | 23 | 1 | 18.66 | 270.0 | 0.45 | 0.0 | 0.00 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.56 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.00 | 24.13 | 24 | 1 | 17.84 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-09-30 19:00:00 | 0.16 | 31.66 | 29 | 1 | 21.19 | 40.0 | 2.72 | 0.0 | 4.59 | 0.0 | ... | 0.0 | 0.0 | 269.6 | 0.0 | 0.0 | 0.0 | 0.000000 | 16.09 | 0.107056 | 0.086570 |
| 2019-09-30 20:00:00 | 0.16 | 30.42 | 28 | 1 | 21.35 | 59.0 | 3.24 | 0.0 | 5.45 | 0.0 | ... | 0.0 | 0.0 | 268.7 | 0.0 | 0.0 | 0.0 | 0.000000 | 16.09 | 0.107056 | 0.086570 |
| 2019-09-30 21:00:00 | 0.18 | 29.56 | 27 | 1 | 21.61 | 76.0 | 3.34 | 0.0 | 5.83 | 0.0 | ... | 0.0 | 0.0 | 268.4 | 0.0 | 0.0 | 0.0 | 0.000000 | 16.09 | 0.107056 | 0.086570 |
| 2019-09-30 22:00:00 | 0.30 | 28.97 | 27 | 1 | 21.99 | 88.0 | 3.22 | 0.0 | 5.73 | 0.0 | ... | 0.0 | 0.0 | 268.7 | 0.0 | 0.0 | 0.0 | 0.000000 | 16.09 | 0.107056 | 0.086570 |
| 2019-09-30 23:00:00 | 0.35 | 28.15 | 26 | 1 | 22.09 | 95.0 | 3.14 | 0.0 | 6.03 | 0.0 | ... | 0.0 | 0.0 | 268.7 | 0.0 | 0.0 | 0.0 | 0.000000 | 16.09 | 0.107056 | 0.086570 |
13619 rows × 24 columns
df_weather.describe()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_chill | wind_gust | heat_index | ... | pop | fctcode | ozone | precip_accumulation | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 13191.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 13619.000000 | 12700.000000 | 12879.000000 | 0.0 | 10355.000000 | 0.0 | ... | 0.0 | 0.0 | 10355.000000 | 0.0 | 10355.000000 | 10355.000000 | 6490.000000 | 13443.000000 | 13619.000000 | 13619.000000 |
| mean | 0.237562 | 25.944639 | 24.498054 | 0.537411 | 11.924626 | 188.786929 | 2.638890 | NaN | 4.245261 | NaN | ... | NaN | NaN | 274.459707 | NaN | 0.058259 | 0.018400 | 0.428831 | 12.290986 | 0.182920 | 0.234222 |
| std | 0.342890 | 10.469808 | 8.840016 | 0.498617 | 9.129459 | 110.879924 | 1.290817 | NaN | 2.177424 | NaN | ... | NaN | NaN | 15.247242 | NaN | 0.251135 | 0.055627 | 0.095356 | 6.211356 | 0.053470 | 0.156872 |
| min | 0.000000 | 3.140000 | 5.000000 | 0.000000 | -20.560000 | 0.000000 | 0.000000 | NaN | 0.630000 | NaN | ... | NaN | NaN | 225.800000 | NaN | 0.000000 | 0.000000 | 0.025185 | 0.000000 | 0.000147 | 0.000073 |
| 25% | 0.000000 | 17.310000 | 17.000000 | 0.000000 | 5.920000 | 91.000000 | 1.760000 | NaN | 2.695000 | NaN | ... | NaN | NaN | 267.800000 | NaN | 0.000000 | 0.000000 | 0.451355 | 3.670000 | 0.129819 | 0.060651 |
| 50% | 0.010000 | 25.540000 | 25.000000 | 1.000000 | 10.710000 | 177.000000 | 2.520000 | NaN | 3.740000 | NaN | ... | NaN | NaN | 275.600000 | NaN | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.214406 | 0.214259 |
| 75% | 0.430000 | 34.580000 | 31.000000 | 1.000000 | 19.670000 | 298.000000 | 3.400000 | NaN | 5.340000 | NaN | ... | NaN | NaN | 283.900000 | NaN | 0.010000 | 0.010000 | 0.451355 | 16.090000 | 0.220060 | 0.408767 |
| max | 1.000000 | 49.750000 | 47.000000 | 1.000000 | 28.350000 | 359.000000 | 9.730000 | NaN | 15.150000 | NaN | ... | NaN | NaN | 351.700000 | NaN | 6.220000 | 0.730000 | 0.451355 | 16.090000 | 0.225788 | 0.408767 |
8 rows × 24 columns
df_weather.isnull().sum()
cloud_cover 428 apparent_temperature 0 temperature 0 humidity 0 dew_point 0 wind_bearing 919 wind_speed 740 wind_chill 13619 wind_gust 3264 heat_index 13619 pressure 2156 qpf 13619 uv_index 3264 snow 13619 pop 13619 fctcode 13619 ozone 3264 precip_accumulation 13619 precip_intensity 3264 precip_probability 3264 precip_type 7129 visibility 176 icon 0 summary 0 dtype: int64
df_weather.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 13619 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 13191 non-null float64 1 apparent_temperature 13619 non-null float64 2 temperature 13619 non-null int64 3 humidity 13619 non-null int64 4 dew_point 13619 non-null float64 5 wind_bearing 12700 non-null float64 6 wind_speed 12879 non-null float64 7 wind_chill 0 non-null float64 8 wind_gust 10355 non-null float64 9 heat_index 0 non-null float64 10 pressure 11463 non-null float64 11 qpf 0 non-null float64 12 uv_index 10355 non-null float64 13 snow 0 non-null float64 14 pop 0 non-null float64 15 fctcode 0 non-null float64 16 ozone 10355 non-null float64 17 precip_accumulation 0 non-null float64 18 precip_intensity 10355 non-null float64 19 precip_probability 10355 non-null float64 20 precip_type 6490 non-null float64 21 visibility 13443 non-null float64 22 icon 13619 non-null float64 23 summary 13619 non-null float64 dtypes: float64(22), int64(2) memory usage: 2.6 MB
f, ax = plt.subplots(figsize=(16,9))
sns.heatmap(df_weather.corr(),cmap="YlGnBu",annot=True)
<AxesSubplot:>
df_weather.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 13619 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 13191 non-null float64 1 apparent_temperature 13619 non-null float64 2 temperature 13619 non-null int64 3 humidity 13619 non-null int64 4 dew_point 13619 non-null float64 5 wind_bearing 12700 non-null float64 6 wind_speed 12879 non-null float64 7 wind_chill 0 non-null float64 8 wind_gust 10355 non-null float64 9 heat_index 0 non-null float64 10 pressure 11463 non-null float64 11 qpf 0 non-null float64 12 uv_index 10355 non-null float64 13 snow 0 non-null float64 14 pop 0 non-null float64 15 fctcode 0 non-null float64 16 ozone 10355 non-null float64 17 precip_accumulation 0 non-null float64 18 precip_intensity 10355 non-null float64 19 precip_probability 10355 non-null float64 20 precip_type 6490 non-null float64 21 visibility 13443 non-null float64 22 icon 13619 non-null float64 23 summary 13619 non-null float64 dtypes: float64(22), int64(2) memory usage: 2.6 MB
df1=df_weather.copy()
df1.fillna(0,inplace=True)
df1.drop(['precip_accumulation','fctcode','pop','snow','qpf','heat_index','wind_chill'],inplace=True,axis=1)
df1.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 13619 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 13619 non-null float64 1 apparent_temperature 13619 non-null float64 2 temperature 13619 non-null int64 3 humidity 13619 non-null int64 4 dew_point 13619 non-null float64 5 wind_bearing 13619 non-null float64 6 wind_speed 13619 non-null float64 7 wind_gust 13619 non-null float64 8 pressure 13619 non-null float64 9 uv_index 13619 non-null float64 10 ozone 13619 non-null float64 11 precip_intensity 13619 non-null float64 12 precip_probability 13619 non-null float64 13 precip_type 13619 non-null float64 14 visibility 13619 non-null float64 15 icon 13619 non-null float64 16 summary 13619 non-null float64 dtypes: float64(15), int64(2) memory usage: 1.9 MB
df1 = df1.resample('1H').pad()
df1.head()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime_local | |||||||||||||||||
| 2017-10-01 00:00:00 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308.0 | 0.83 | 0.0 | 1007.90 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.57 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.0 | 28.78 | 27 | 1 | 20.06 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.0 | 26.58 | 26 | 1 | 19.97 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270.0 | 0.45 | 0.0 | 1008.12 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.56 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.0 | 24.13 | 24 | 1 | 17.84 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
df2= pd.concat([df,df1],axis=1)
df2.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 17520 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Freq: H Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 power 17520 non-null float64 1 cloud_cover 17520 non-null float64 2 apparent_temperature 17520 non-null float64 3 temperature 17520 non-null int64 4 humidity 17520 non-null int64 5 dew_point 17520 non-null float64 6 wind_bearing 17520 non-null float64 7 wind_speed 17520 non-null float64 8 wind_gust 17520 non-null float64 9 pressure 17520 non-null float64 10 uv_index 17520 non-null float64 11 ozone 17520 non-null float64 12 precip_intensity 17520 non-null float64 13 precip_probability 17520 non-null float64 14 precip_type 17520 non-null float64 15 visibility 17520 non-null float64 16 icon 17520 non-null float64 17 summary 17520 non-null float64 dtypes: float64(16), int64(2) memory usage: 3.2 MB
df2.describe(include='all')
| power | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 | 17520.000000 |
| mean | 28.612915 | 0.178863 | 25.245933 | 24.145320 | 0.427340 | 10.444118 | 211.535902 | 2.268933 | 3.176858 | 885.228147 | 1.105993 | 228.155667 | 0.034433 | 0.010875 | 0.164461 | 9.430807 | 0.192465 | 0.273086 |
| std | 487.944707 | 0.314675 | 9.325530 | 7.824069 | 0.494706 | 8.536539 | 122.691590 | 1.297273 | 2.321461 | 331.679424 | 2.430003 | 110.158910 | 0.195179 | 0.043711 | 0.211153 | 7.521820 | 0.050404 | 0.156213 |
| min | 0.000000 | 0.000000 | 3.140000 | 5.000000 | 0.000000 | -20.560000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000147 | 0.000073 |
| 25% | 0.000000 | 0.000000 | 19.770000 | 20.000000 | 0.000000 | 4.980000 | 97.000000 | 1.490000 | 1.920000 | 1000.740000 | 0.000000 | 254.800000 | 0.000000 | 0.000000 | 0.000000 | 0.640000 | 0.129819 | 0.086570 |
| 50% | 0.000000 | 0.000000 | 22.910000 | 23.000000 | 0.000000 | 7.880000 | 260.000000 | 1.990000 | 3.070000 | 1010.460000 | 0.000000 | 276.500000 | 0.000000 | 0.000000 | 0.025185 | 16.090000 | 0.220060 | 0.408767 |
| 75% | 5.852500 | 0.230000 | 32.152500 | 30.000000 | 1.000000 | 16.290000 | 335.000000 | 3.050000 | 4.120000 | 1012.980000 | 0.000000 | 294.705000 | 0.000000 | 0.000000 | 0.451355 | 16.090000 | 0.225788 | 0.408767 |
| max | 15091.685000 | 1.000000 | 49.750000 | 47.000000 | 1.000000 | 28.350000 | 359.000000 | 9.730000 | 15.150000 | 1025.690000 | 12.000000 | 351.700000 | 6.220000 | 0.730000 | 0.451355 | 16.090000 | 0.225788 | 0.408767 |
df2.head()
| power | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||||
| 2017-10-01 00:00:00 | 0.0 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308.0 | 0.83 | 0.0 | 1007.90 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.57 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.0 | 0.0 | 28.78 | 27 | 1 | 20.06 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.0 | 0.0 | 26.58 | 26 | 1 | 19.97 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.0 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270.0 | 0.45 | 0.0 | 1008.12 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.56 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.0 | 0.0 | 24.13 | 24 | 1 | 17.84 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
df2.to_csv('final_cleaned_data.csv')
pd.date_range(start = '2017-10-01', end = '2019-09-30' ).difference(df_weather.index)
DatetimeIndex(['2018-02-28', '2018-03-01', '2018-03-02', '2018-03-03',
'2018-03-04', '2018-03-05', '2018-03-06', '2018-03-13',
'2018-03-14', '2018-03-15',
...
'2018-08-06', '2018-08-07', '2018-08-08', '2018-08-09',
'2018-08-10', '2018-08-11', '2018-08-12', '2018-08-13',
'2018-08-14', '2018-08-15'],
dtype='datetime64[ns]', length=163, freq=None)
f, ax = plt.subplots(figsize=(16,9))
sns.heatmap(df2.corr(),cmap="YlGnBu",annot=True)
<AxesSubplot:>
energy_series = df2.loc[:, ('summary')]
energy_series.plot()
<AxesSubplot:xlabel='datetime'>
temperature_series=df2.loc[:,('temperature')]
temperature_series.plot()
<AxesSubplot:xlabel='datetime'>
power_series=df2.loc[:,('power')]
power_series.plot()
<AxesSubplot:xlabel='datetime'>
df_weather_forecast=pd.read_csv('weather_forecast.csv')
df_weather_forecast=df_weather_forecast.drop(['datetime_utc','plant_id','Unnamed: 0','updated_at','sunset','sunrise','plant_id'], axis=1)
df_weather_forecast['datetime_local']=pd.to_datetime(df_weather_forecast['datetime_local'])
df_weather_forecast.set_index('datetime_local',inplace=True)
pd.date_range(start = '2019-10-01', end = '2019-10-27' ).difference(df_weather_forecast.index)
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
df_weather_forecast.drop(['precip_accumulation','fctcode','pop','snow','qpf','heat_index','wind_chill'],inplace=True,axis=1)
df_weather_forecast.shape
(648, 17)
df_weather_forecast['precip_type'].value_counts()
rain 114 Name: precip_type, dtype: int64
df_weather_forecast.fillna(0,axis=1,inplace=True)
df_weather_forecast.isnull().sum()
cloud_cover 0 apparent_temperature 0 temperature 0 humidity 0 dew_point 0 wind_bearing 0 wind_speed 0 wind_gust 0 pressure 0 uv_index 0 ozone 0 precip_intensity 0 precip_probability 0 precip_type 0 visibility 0 icon 0 summary 0 dtype: int64
df_weather_forecast.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 648 entries, 2019-10-01 00:00:00 to 2019-10-27 23:00:00 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 648 non-null float64 1 apparent_temperature 648 non-null float64 2 temperature 648 non-null int64 3 humidity 648 non-null int64 4 dew_point 648 non-null float64 5 wind_bearing 648 non-null int64 6 wind_speed 648 non-null float64 7 wind_gust 648 non-null float64 8 pressure 648 non-null float64 9 uv_index 648 non-null int64 10 ozone 648 non-null float64 11 precip_intensity 648 non-null float64 12 precip_probability 648 non-null float64 13 precip_type 648 non-null object 14 visibility 648 non-null float64 15 icon 648 non-null object 16 summary 648 non-null object dtypes: float64(10), int64(4), object(3) memory usage: 91.1+ KB
se = df_weather_forecast.groupby('icon').size()/len(df_weather_forecast)
df_weather_forecast.loc[:,'icon']=df_weather_forecast['icon'].map(se)
se_s = df_weather_forecast.groupby('summary').size()/len(df_weather_forecast)
df_weather_forecast.loc[:,'summary']=df_weather_forecast['summary'].map(se_s)
se_p = df_weather_forecast.groupby('precip_type').size()/len(df_weather_forecast)
df_weather_forecast.loc[:,'precip_type']=df_weather_forecast['precip_type'].map(se_p)
df_weather_forecast.head()
| cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime_local | |||||||||||||||||
| 2019-10-01 00:00:00 | 0.35 | 26.41 | 26 | 1 | 22.01 | 101 | 3.08 | 6.42 | 1008.45 | 0 | 268.6 | 0.0 | 0.0 | 0.824074 | 16.09 | 0.098765 | 0.112654 |
| 2019-10-01 01:00:00 | 0.43 | 25.95 | 25 | 1 | 22.06 | 105 | 2.94 | 6.28 | 1008.24 | 0 | 269.1 | 0.0 | 0.0 | 0.824074 | 16.09 | 0.098765 | 0.112654 |
| 2019-10-01 02:00:00 | 0.48 | 25.46 | 25 | 1 | 22.06 | 106 | 2.82 | 6.08 | 1007.90 | 0 | 269.7 | 0.0 | 0.0 | 0.824074 | 16.09 | 0.098765 | 0.112654 |
| 2019-10-01 03:00:00 | 0.49 | 24.80 | 24 | 1 | 21.87 | 106 | 2.63 | 5.74 | 1007.61 | 0 | 270.4 | 0.0 | 0.0 | 0.824074 | 16.09 | 0.098765 | 0.112654 |
| 2019-10-01 04:00:00 | 0.49 | 23.99 | 23 | 1 | 21.53 | 100 | 2.39 | 5.30 | 1007.51 | 0 | 271.3 | 0.0 | 0.0 | 0.824074 | 16.09 | 0.098765 | 0.112654 |
df_weather_forecast['power']=''
df_weather_forecast['power']=pd.to_numeric(df_weather_forecast['power'],errors='coerce')
df_weather_forecast.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 648 entries, 2019-10-01 00:00:00 to 2019-10-27 23:00:00 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cloud_cover 648 non-null float64 1 apparent_temperature 648 non-null float64 2 temperature 648 non-null int64 3 humidity 648 non-null int64 4 dew_point 648 non-null float64 5 wind_bearing 648 non-null int64 6 wind_speed 648 non-null float64 7 wind_gust 648 non-null float64 8 pressure 648 non-null float64 9 uv_index 648 non-null int64 10 ozone 648 non-null float64 11 precip_intensity 648 non-null float64 12 precip_probability 648 non-null float64 13 precip_type 648 non-null float64 14 visibility 648 non-null float64 15 icon 648 non-null float64 16 summary 648 non-null float64 17 power 0 non-null float64 dtypes: float64(14), int64(4) memory usage: 96.2 KB
df2.head()
| power | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||||
| 2017-10-01 00:00:00 | 0.0 | 0.0 | 26.52 | 26 | 1 | 19.46 | 308.0 | 0.83 | 0.0 | 1007.90 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.57 | 0.225788 | 0.408767 |
| 2017-10-01 01:00:00 | 0.0 | 0.0 | 28.78 | 27 | 1 | 20.06 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 02:00:00 | 0.0 | 0.0 | 26.58 | 26 | 1 | 19.97 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
| 2017-10-01 03:00:00 | 0.0 | 0.0 | 23.84 | 23 | 1 | 18.66 | 270.0 | 0.45 | 0.0 | 1008.12 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.56 | 0.225788 | 0.408767 |
| 2017-10-01 04:00:00 | 0.0 | 0.0 | 24.13 | 24 | 1 | 17.84 | 0.0 | 0.00 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.451355 | 3.06 | 0.214406 | 0.214259 |
df2.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 17520 entries, 2017-10-01 00:00:00 to 2019-09-30 23:00:00 Freq: H Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 power 17520 non-null float64 1 cloud_cover 17520 non-null float64 2 apparent_temperature 17520 non-null float64 3 temperature 17520 non-null int64 4 humidity 17520 non-null int64 5 dew_point 17520 non-null float64 6 wind_bearing 17520 non-null float64 7 wind_speed 17520 non-null float64 8 wind_gust 17520 non-null float64 9 pressure 17520 non-null float64 10 uv_index 17520 non-null float64 11 ozone 17520 non-null float64 12 precip_intensity 17520 non-null float64 13 precip_probability 17520 non-null float64 14 precip_type 17520 non-null float64 15 visibility 17520 non-null float64 16 icon 17520 non-null float64 17 summary 17520 non-null float64 dtypes: float64(16), int64(2) memory usage: 3.2 MB
train_data=df2.loc['2017-10-01':'2019-07-30']
val_data=df2.loc['2019-08-01':'2019-08-31']
test_data=df2.loc['2019-08-01':'2019-09-30']
from sklearn.preprocessing import MinMaxScaler, StandardScaler
clean_train_data=train_data.copy()
clean_val_data=val_data.copy()
clean_test_data=test_data.copy()
x_train,y_train=clean_train_data.drop(['power'],axis=1),clean_train_data.power
x_test, y_test=clean_test_data.drop(['power'],axis=1),clean_test_data.power
x_val, y_val=clean_val_data.drop(['power'],axis=1),clean_val_data.power
x_test.shape
(1464, 17)
x_train.shape
(16032, 17)
x_val.shape
(744, 17)
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.fit_transform(x_test)
x_val_scaled=scaler.fit_transform(x_val)
x_train_reshaped=x_train_scaled.reshape((x_train_scaled.shape[0],1,x_train_scaled.shape[1]))
x_val_reshaped=x_val_scaled.reshape((x_val_scaled.shape[0],1,x_val_scaled.shape[1]))
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(25, input_shape= (1,17), return_sequences=True))
model.add(tf.keras.layers.ReLU())
model.add(tf.keras.layers.LSTM(25, return_sequences=True))
model.add(tf.keras.layers.ReLU())
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.LSTM(5, return_sequences=False))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(3))
model.summary()
Model: "sequential_14" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_31 (LSTM) (None, 1, 25) 4300 _________________________________________________________________ re_lu_18 (ReLU) (None, 1, 25) 0 _________________________________________________________________ lstm_32 (LSTM) (None, 1, 25) 5100 _________________________________________________________________ re_lu_19 (ReLU) (None, 1, 25) 0 _________________________________________________________________ dropout_18 (Dropout) (None, 1, 25) 0 _________________________________________________________________ lstm_33 (LSTM) (None, 5) 620 _________________________________________________________________ dropout_19 (Dropout) (None, 5) 0 _________________________________________________________________ dense_13 (Dense) (None, 3) 18 ================================================================= Total params: 10,038 Trainable params: 10,038 Non-trainable params: 0 _________________________________________________________________
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=2,
mode='min')
model.compile(loss=tf.losses.MeanSquaredError(),
optimizer=tf.optimizers.Adam(lr=0.001),
metrics=[tf.keras.metrics.MeanAbsoluteError()])
history = model.fit(x_train_reshaped,y_train, epochs=200,
validation_data=(x_val_reshaped,y_val),
shuffle=False)
Epoch 1/200 501/501 [==============================] - 3s 6ms/step - loss: 260931.2188 - mean_absolute_error: 31.2826 - val_loss: 17.9854 - val_mean_absolute_error: 3.0966 Epoch 2/200 501/501 [==============================] - 2s 3ms/step - loss: 260887.4219 - mean_absolute_error: 31.4072 - val_loss: 15.9234 - val_mean_absolute_error: 2.5712 Epoch 3/200 501/501 [==============================] - 2s 3ms/step - loss: 260859.0000 - mean_absolute_error: 31.2808 - val_loss: 15.1265 - val_mean_absolute_error: 2.6614 Epoch 4/200 501/501 [==============================] - 2s 3ms/step - loss: 260848.0156 - mean_absolute_error: 31.2866 - val_loss: 14.5729 - val_mean_absolute_error: 2.6283 Epoch 5/200 501/501 [==============================] - 2s 3ms/step - loss: 260834.9062 - mean_absolute_error: 31.2321 - val_loss: 14.0840 - val_mean_absolute_error: 2.5846 Epoch 6/200 501/501 [==============================] - 2s 3ms/step - loss: 260826.0000 - mean_absolute_error: 31.1672 - val_loss: 13.5341 - val_mean_absolute_error: 2.5088 Epoch 7/200 501/501 [==============================] - 2s 3ms/step - loss: 260805.5000 - mean_absolute_error: 31.0918 - val_loss: 13.0282 - val_mean_absolute_error: 2.4466 Epoch 8/200 501/501 [==============================] - 2s 3ms/step - loss: 260810.3125 - mean_absolute_error: 31.0358 - val_loss: 12.5177 - val_mean_absolute_error: 2.3698 Epoch 9/200 501/501 [==============================] - 2s 4ms/step - loss: 260787.3438 - mean_absolute_error: 30.9066 - val_loss: 12.1077 - val_mean_absolute_error: 2.3366 Epoch 10/200 501/501 [==============================] - 2s 3ms/step - loss: 260779.6875 - mean_absolute_error: 31.0137 - val_loss: 12.0622 - val_mean_absolute_error: 2.3720 Epoch 11/200 501/501 [==============================] - 2s 3ms/step - loss: 260761.4219 - mean_absolute_error: 30.9100 - val_loss: 11.6788 - val_mean_absolute_error: 2.3484 Epoch 12/200 501/501 [==============================] - 2s 3ms/step - loss: 260757.4844 - mean_absolute_error: 30.8369 - val_loss: 11.3210 - val_mean_absolute_error: 2.3158 Epoch 13/200 501/501 [==============================] - 2s 3ms/step - loss: 260733.6562 - mean_absolute_error: 30.8808 - val_loss: 11.0592 - val_mean_absolute_error: 2.2873 Epoch 14/200 501/501 [==============================] - 2s 3ms/step - loss: 260730.1250 - mean_absolute_error: 30.7857 - val_loss: 10.6745 - val_mean_absolute_error: 2.2449 Epoch 15/200 501/501 [==============================] - 2s 3ms/step - loss: 260730.0000 - mean_absolute_error: 30.9193 - val_loss: 10.6999 - val_mean_absolute_error: 2.1791 Epoch 16/200 501/501 [==============================] - 2s 3ms/step - loss: 260702.2812 - mean_absolute_error: 30.7441 - val_loss: 10.3054 - val_mean_absolute_error: 2.1640 Epoch 17/200 501/501 [==============================] - 2s 3ms/step - loss: 260702.4688 - mean_absolute_error: 30.7745 - val_loss: 10.0462 - val_mean_absolute_error: 2.1035 Epoch 18/200 501/501 [==============================] - 2s 3ms/step - loss: 260691.8281 - mean_absolute_error: 30.6575 - val_loss: 9.9316 - val_mean_absolute_error: 2.1207 Epoch 19/200 501/501 [==============================] - 2s 3ms/step - loss: 260669.3281 - mean_absolute_error: 30.8484 - val_loss: 9.9837 - val_mean_absolute_error: 2.0980 Epoch 20/200 501/501 [==============================] - 2s 4ms/step - loss: 260665.6094 - mean_absolute_error: 30.7038 - val_loss: 9.7313 - val_mean_absolute_error: 2.0722 Epoch 21/200 501/501 [==============================] - 2s 3ms/step - loss: 260649.4219 - mean_absolute_error: 30.6170 - val_loss: 9.5950 - val_mean_absolute_error: 2.0296 Epoch 22/200 501/501 [==============================] - 2s 3ms/step - loss: 260650.6875 - mean_absolute_error: 30.6658 - val_loss: 9.5892 - val_mean_absolute_error: 2.0110 Epoch 23/200 501/501 [==============================] - 2s 3ms/step - loss: 260628.5000 - mean_absolute_error: 30.5442 - val_loss: 9.5662 - val_mean_absolute_error: 2.0093 Epoch 24/200 501/501 [==============================] - 2s 3ms/step - loss: 260634.4375 - mean_absolute_error: 30.6583 - val_loss: 9.5507 - val_mean_absolute_error: 1.9526 Epoch 25/200 501/501 [==============================] - 2s 3ms/step - loss: 260620.0938 - mean_absolute_error: 30.5163 - val_loss: 9.6918 - val_mean_absolute_error: 1.9938 Epoch 26/200 501/501 [==============================] - 2s 4ms/step - loss: 260614.3125 - mean_absolute_error: 30.7110 - val_loss: 10.0643 - val_mean_absolute_error: 1.9689 Epoch 27/200 501/501 [==============================] - 2s 3ms/step - loss: 260596.2188 - mean_absolute_error: 30.5305 - val_loss: 9.9026 - val_mean_absolute_error: 1.9662 Epoch 28/200 501/501 [==============================] - 2s 3ms/step - loss: 260579.6562 - mean_absolute_error: 30.5251 - val_loss: 10.0247 - val_mean_absolute_error: 1.9512 Epoch 29/200 501/501 [==============================] - 2s 4ms/step - loss: 260568.6094 - mean_absolute_error: 30.4260 - val_loss: 10.1755 - val_mean_absolute_error: 1.9687 Epoch 30/200 501/501 [==============================] - 2s 3ms/step - loss: 260547.0938 - mean_absolute_error: 30.5772 - val_loss: 10.2675 - val_mean_absolute_error: 1.9342 Epoch 31/200 501/501 [==============================] - 2s 4ms/step - loss: 260547.6094 - mean_absolute_error: 30.4336 - val_loss: 10.4829 - val_mean_absolute_error: 1.9665 Epoch 32/200 501/501 [==============================] - 2s 3ms/step - loss: 260520.2656 - mean_absolute_error: 30.6396 - val_loss: 11.1896 - val_mean_absolute_error: 1.9746 Epoch 33/200 501/501 [==============================] - 2s 3ms/step - loss: 260518.2812 - mean_absolute_error: 30.4520 - val_loss: 11.1014 - val_mean_absolute_error: 1.9753 Epoch 34/200 501/501 [==============================] - 2s 3ms/step - loss: 260517.1094 - mean_absolute_error: 30.4434 - val_loss: 11.2337 - val_mean_absolute_error: 1.9803 Epoch 35/200 501/501 [==============================] - 2s 3ms/step - loss: 260515.5000 - mean_absolute_error: 30.4075 - val_loss: 11.5291 - val_mean_absolute_error: 2.0131 Epoch 36/200 501/501 [==============================] - 2s 3ms/step - loss: 260513.7812 - mean_absolute_error: 30.4272 - val_loss: 11.7498 - val_mean_absolute_error: 1.9960 Epoch 37/200 501/501 [==============================] - 2s 4ms/step - loss: 260500.3906 - mean_absolute_error: 30.3956 - val_loss: 12.0402 - val_mean_absolute_error: 2.0182 Epoch 38/200 501/501 [==============================] - 2s 4ms/step - loss: 260453.5938 - mean_absolute_error: 30.3968 - val_loss: 12.6304 - val_mean_absolute_error: 2.0411 Epoch 39/200 501/501 [==============================] - 2s 4ms/step - loss: 260449.0469 - mean_absolute_error: 30.4836 - val_loss: 12.6347 - val_mean_absolute_error: 2.0407 Epoch 40/200 501/501 [==============================] - 2s 4ms/step - loss: 260462.2344 - mean_absolute_error: 30.3912 - val_loss: 13.0867 - val_mean_absolute_error: 2.1056 Epoch 41/200 501/501 [==============================] - 2s 3ms/step - loss: 260433.1406 - mean_absolute_error: 30.6067 - val_loss: 13.6718 - val_mean_absolute_error: 2.0818 Epoch 42/200 501/501 [==============================] - 2s 3ms/step - loss: 260426.2188 - mean_absolute_error: 30.4567 - val_loss: 13.4016 - val_mean_absolute_error: 2.0815 Epoch 43/200 501/501 [==============================] - 2s 3ms/step - loss: 260414.0312 - mean_absolute_error: 30.4203 - val_loss: 13.4936 - val_mean_absolute_error: 2.0911 Epoch 44/200 501/501 [==============================] - 2s 3ms/step - loss: 260412.7969 - mean_absolute_error: 30.4120 - val_loss: 13.7332 - val_mean_absolute_error: 2.0947 Epoch 45/200 501/501 [==============================] - 2s 3ms/step - loss: 260395.1875 - mean_absolute_error: 30.3690 - val_loss: 14.1532 - val_mean_absolute_error: 2.1330 Epoch 46/200 501/501 [==============================] - 2s 3ms/step - loss: 260388.3594 - mean_absolute_error: 30.3801 - val_loss: 14.2362 - val_mean_absolute_error: 2.1438 Epoch 47/200 501/501 [==============================] - 2s 4ms/step - loss: 260391.9531 - mean_absolute_error: 30.3772 - val_loss: 14.6372 - val_mean_absolute_error: 2.1821 Epoch 48/200 501/501 [==============================] - 2s 3ms/step - loss: 260377.9688 - mean_absolute_error: 30.4046 - val_loss: 14.9736 - val_mean_absolute_error: 2.1950 Epoch 49/200 501/501 [==============================] - 2s 3ms/step - loss: 260331.8438 - mean_absolute_error: 30.3602 - val_loss: 15.5592 - val_mean_absolute_error: 2.3098 Epoch 50/200 501/501 [==============================] - 2s 3ms/step - loss: 260365.8281 - mean_absolute_error: 30.4893 - val_loss: 15.1093 - val_mean_absolute_error: 2.1826 Epoch 51/200 501/501 [==============================] - 2s 3ms/step - loss: 260341.1094 - mean_absolute_error: 30.3557 - val_loss: 15.7143 - val_mean_absolute_error: 2.2603 Epoch 52/200 501/501 [==============================] - 2s 3ms/step - loss: 260312.3750 - mean_absolute_error: 30.4428 - val_loss: 16.3912 - val_mean_absolute_error: 2.3383 Epoch 53/200 501/501 [==============================] - 2s 3ms/step - loss: 260327.2188 - mean_absolute_error: 30.3505 - val_loss: 16.6313 - val_mean_absolute_error: 2.4071 Epoch 54/200 501/501 [==============================] - 2s 4ms/step - loss: 260346.1250 - mean_absolute_error: 30.5651 - val_loss: 16.4434 - val_mean_absolute_error: 2.2893 Epoch 55/200 501/501 [==============================] - 2s 3ms/step - loss: 260303.2344 - mean_absolute_error: 30.3957 - val_loss: 17.0627 - val_mean_absolute_error: 2.3719 Epoch 56/200 501/501 [==============================] - 2s 4ms/step - loss: 260292.5625 - mean_absolute_error: 30.4464 - val_loss: 17.1917 - val_mean_absolute_error: 2.4154 Epoch 57/200 501/501 [==============================] - 2s 4ms/step - loss: 260288.1406 - mean_absolute_error: 30.3917 - val_loss: 17.1186 - val_mean_absolute_error: 2.4311 Epoch 58/200 501/501 [==============================] - 2s 4ms/step - loss: 260249.1719 - mean_absolute_error: 30.4172 - val_loss: 17.2994 - val_mean_absolute_error: 2.4575 Epoch 59/200 501/501 [==============================] - 2s 3ms/step - loss: 260252.3281 - mean_absolute_error: 30.3762 - val_loss: 17.9726 - val_mean_absolute_error: 2.5454 Epoch 60/200 501/501 [==============================] - 2s 3ms/step - loss: 260242.0000 - mean_absolute_error: 30.5965 - val_loss: 17.9777 - val_mean_absolute_error: 2.4742 Epoch 61/200 501/501 [==============================] - 2s 3ms/step - loss: 260229.9375 - mean_absolute_error: 30.4068 - val_loss: 17.8292 - val_mean_absolute_error: 2.5200 Epoch 62/200 501/501 [==============================] - 2s 3ms/step - loss: 260231.2500 - mean_absolute_error: 30.5646 - val_loss: 18.3615 - val_mean_absolute_error: 2.5890 Epoch 63/200 501/501 [==============================] - 2s 3ms/step - loss: 260216.0000 - mean_absolute_error: 30.4134 - val_loss: 18.9675 - val_mean_absolute_error: 2.7392 Epoch 64/200 501/501 [==============================] - 2s 3ms/step - loss: 260200.4219 - mean_absolute_error: 30.6109 - val_loss: 18.7744 - val_mean_absolute_error: 2.5831 Epoch 65/200 501/501 [==============================] - 2s 3ms/step - loss: 260205.9375 - mean_absolute_error: 30.4512 - val_loss: 19.2810 - val_mean_absolute_error: 2.7272 Epoch 66/200 501/501 [==============================] - 2s 4ms/step - loss: 260178.7656 - mean_absolute_error: 30.6185 - val_loss: 18.3390 - val_mean_absolute_error: 2.5171 Epoch 67/200 501/501 [==============================] - 2s 3ms/step - loss: 260151.2812 - mean_absolute_error: 30.4394 - val_loss: 18.1316 - val_mean_absolute_error: 2.5722 Epoch 68/200 501/501 [==============================] - 2s 3ms/step - loss: 260169.3594 - mean_absolute_error: 30.5418 - val_loss: 19.0179 - val_mean_absolute_error: 2.6203 Epoch 69/200 501/501 [==============================] - 2s 4ms/step - loss: 260153.2031 - mean_absolute_error: 30.4655 - val_loss: 18.5108 - val_mean_absolute_error: 2.6068 Epoch 70/200 501/501 [==============================] - 2s 4ms/step - loss: 260144.4219 - mean_absolute_error: 30.6337 - val_loss: 17.9826 - val_mean_absolute_error: 2.4907 Epoch 71/200 501/501 [==============================] - 2s 3ms/step - loss: 260130.7812 - mean_absolute_error: 30.4464 - val_loss: 17.9979 - val_mean_absolute_error: 2.5546 Epoch 72/200 501/501 [==============================] - 2s 3ms/step - loss: 260107.6562 - mean_absolute_error: 30.5867 - val_loss: 18.8816 - val_mean_absolute_error: 2.5917 Epoch 73/200 501/501 [==============================] - 2s 3ms/step - loss: 260104.1875 - mean_absolute_error: 30.5349 - val_loss: 18.2207 - val_mean_absolute_error: 2.5414 Epoch 74/200 501/501 [==============================] - 2s 3ms/step - loss: 260116.4375 - mean_absolute_error: 30.4866 - val_loss: 17.7991 - val_mean_absolute_error: 2.5463 Epoch 75/200 501/501 [==============================] - 2s 4ms/step - loss: 260085.5781 - mean_absolute_error: 30.4736 - val_loss: 17.4158 - val_mean_absolute_error: 2.5124 Epoch 76/200 501/501 [==============================] - 2s 4ms/step - loss: 260079.8281 - mean_absolute_error: 30.4645 - val_loss: 17.6827 - val_mean_absolute_error: 2.5294 Epoch 77/200 501/501 [==============================] - 2s 4ms/step - loss: 260096.9844 - mean_absolute_error: 30.5143 - val_loss: 18.3939 - val_mean_absolute_error: 2.5757 Epoch 78/200 501/501 [==============================] - 2s 4ms/step - loss: 260059.7031 - mean_absolute_error: 30.5489 - val_loss: 18.6486 - val_mean_absolute_error: 2.5276 Epoch 79/200 501/501 [==============================] - 2s 3ms/step - loss: 260023.4688 - mean_absolute_error: 30.5485 - val_loss: 19.7555 - val_mean_absolute_error: 2.7394 Epoch 80/200 501/501 [==============================] - 2s 3ms/step - loss: 260055.8281 - mean_absolute_error: 30.6277 - val_loss: 18.5964 - val_mean_absolute_error: 2.5982 Epoch 81/200 501/501 [==============================] - 2s 3ms/step - loss: 259997.1875 - mean_absolute_error: 30.5810 - val_loss: 18.0498 - val_mean_absolute_error: 2.5853 Epoch 82/200 501/501 [==============================] - 2s 4ms/step - loss: 260006.6094 - mean_absolute_error: 30.5169 - val_loss: 17.6961 - val_mean_absolute_error: 2.5462 Epoch 83/200 501/501 [==============================] - 2s 3ms/step - loss: 259984.6094 - mean_absolute_error: 30.5403 - val_loss: 16.9435 - val_mean_absolute_error: 2.4562 Epoch 84/200 501/501 [==============================] - 2s 4ms/step - loss: 259989.3906 - mean_absolute_error: 30.4694 - val_loss: 17.2818 - val_mean_absolute_error: 2.5123 Epoch 85/200 501/501 [==============================] - 2s 3ms/step - loss: 259977.3594 - mean_absolute_error: 30.7104 - val_loss: 20.0342 - val_mean_absolute_error: 2.6798 Epoch 86/200 501/501 [==============================] - 2s 3ms/step - loss: 259958.1406 - mean_absolute_error: 30.5556 - val_loss: 18.6603 - val_mean_absolute_error: 2.6184 Epoch 87/200 501/501 [==============================] - 2s 3ms/step - loss: 259955.8281 - mean_absolute_error: 30.5663 - val_loss: 17.5511 - val_mean_absolute_error: 2.5258 Epoch 88/200 501/501 [==============================] - 2s 3ms/step - loss: 259954.2969 - mean_absolute_error: 30.4349 - val_loss: 19.3164 - val_mean_absolute_error: 2.6124 Epoch 89/200 501/501 [==============================] - 2s 3ms/step - loss: 259964.5000 - mean_absolute_error: 30.6754 - val_loss: 21.9235 - val_mean_absolute_error: 2.7842 Epoch 90/200 501/501 [==============================] - 2s 3ms/step - loss: 259929.1562 - mean_absolute_error: 30.5831 - val_loss: 20.9279 - val_mean_absolute_error: 2.7513 Epoch 91/200 501/501 [==============================] - 2s 3ms/step - loss: 259914.6719 - mean_absolute_error: 30.5151 - val_loss: 21.6727 - val_mean_absolute_error: 2.8882 Epoch 92/200 501/501 [==============================] - 2s 3ms/step - loss: 259877.5781 - mean_absolute_error: 30.7906 - val_loss: 21.7458 - val_mean_absolute_error: 2.7605 Epoch 93/200 501/501 [==============================] - 2s 3ms/step - loss: 259916.3750 - mean_absolute_error: 30.5559 - val_loss: 20.2188 - val_mean_absolute_error: 2.7549 Epoch 94/200 501/501 [==============================] - 2s 4ms/step - loss: 259867.3125 - mean_absolute_error: 30.5706 - val_loss: 18.9062 - val_mean_absolute_error: 2.5939 Epoch 95/200 501/501 [==============================] - 2s 3ms/step - loss: 259869.6094 - mean_absolute_error: 30.5363 - val_loss: 18.3573 - val_mean_absolute_error: 2.5687 Epoch 96/200 501/501 [==============================] - 2s 3ms/step - loss: 259853.1562 - mean_absolute_error: 30.4379 - val_loss: 19.5411 - val_mean_absolute_error: 2.6719 Epoch 97/200 501/501 [==============================] - 2s 3ms/step - loss: 259869.5156 - mean_absolute_error: 31.0359 - val_loss: 21.8523 - val_mean_absolute_error: 2.7039 Epoch 98/200 501/501 [==============================] - 2s 4ms/step - loss: 259830.4062 - mean_absolute_error: 30.6326 - val_loss: 18.5637 - val_mean_absolute_error: 2.5752 Epoch 99/200 501/501 [==============================] - 2s 3ms/step - loss: 259833.2812 - mean_absolute_error: 30.6987 - val_loss: 20.3129 - val_mean_absolute_error: 2.6726 Epoch 100/200 501/501 [==============================] - 2s 4ms/step - loss: 259864.4062 - mean_absolute_error: 30.5071 - val_loss: 20.9459 - val_mean_absolute_error: 2.6710 Epoch 101/200 501/501 [==============================] - 2s 3ms/step - loss: 259850.7812 - mean_absolute_error: 30.7576 - val_loss: 23.7439 - val_mean_absolute_error: 2.8832 Epoch 102/200 501/501 [==============================] - 2s 5ms/step - loss: 259801.3125 - mean_absolute_error: 30.6810 - val_loss: 22.6964 - val_mean_absolute_error: 2.8528 Epoch 103/200 501/501 [==============================] - 2s 4ms/step - loss: 259815.9531 - mean_absolute_error: 30.6733 - val_loss: 21.5569 - val_mean_absolute_error: 2.7685 Epoch 104/200 501/501 [==============================] - 2s 4ms/step - loss: 259771.1875 - mean_absolute_error: 30.5228 - val_loss: 20.9442 - val_mean_absolute_error: 2.7685 Epoch 105/200 501/501 [==============================] - 2s 4ms/step - loss: 259804.8438 - mean_absolute_error: 30.7632 - val_loss: 19.4815 - val_mean_absolute_error: 2.5429 Epoch 106/200 501/501 [==============================] - 2s 4ms/step - loss: 259756.6406 - mean_absolute_error: 30.4881 - val_loss: 20.1929 - val_mean_absolute_error: 2.5904 Epoch 107/200 501/501 [==============================] - 2s 3ms/step - loss: 259798.0625 - mean_absolute_error: 30.6754 - val_loss: 22.1621 - val_mean_absolute_error: 2.7479 Epoch 108/200 501/501 [==============================] - 2s 3ms/step - loss: 259765.7188 - mean_absolute_error: 30.4904 - val_loss: 20.4480 - val_mean_absolute_error: 2.6466 Epoch 109/200 501/501 [==============================] - 2s 3ms/step - loss: 259729.3594 - mean_absolute_error: 30.7016 - val_loss: 19.5665 - val_mean_absolute_error: 2.5376 Epoch 110/200 501/501 [==============================] - 2s 3ms/step - loss: 259728.5469 - mean_absolute_error: 30.3984 - val_loss: 21.0117 - val_mean_absolute_error: 2.5817 Epoch 111/200 501/501 [==============================] - 2s 4ms/step - loss: 259758.5938 - mean_absolute_error: 30.7618 - val_loss: 25.2178 - val_mean_absolute_error: 2.8680 Epoch 112/200 501/501 [==============================] - 2s 4ms/step - loss: 259695.0156 - mean_absolute_error: 30.7486 - val_loss: 24.8810 - val_mean_absolute_error: 2.8505 Epoch 113/200 501/501 [==============================] - 2s 4ms/step - loss: 259688.9688 - mean_absolute_error: 30.6240 - val_loss: 24.4025 - val_mean_absolute_error: 2.8617 Epoch 114/200 501/501 [==============================] - 2s 4ms/step - loss: 259624.7188 - mean_absolute_error: 30.6618 - val_loss: 23.1586 - val_mean_absolute_error: 2.7545 Epoch 115/200 501/501 [==============================] - 2s 3ms/step - loss: 259639.0469 - mean_absolute_error: 30.4829 - val_loss: 20.8940 - val_mean_absolute_error: 2.6464 Epoch 116/200 501/501 [==============================] - 2s 4ms/step - loss: 259632.0312 - mean_absolute_error: 30.5878 - val_loss: 20.2923 - val_mean_absolute_error: 2.5308 Epoch 117/200 501/501 [==============================] - 2s 3ms/step - loss: 259704.8750 - mean_absolute_error: 30.4808 - val_loss: 20.3472 - val_mean_absolute_error: 2.5639 Epoch 118/200 501/501 [==============================] - 2s 4ms/step - loss: 259612.9688 - mean_absolute_error: 30.8324 - val_loss: 24.5936 - val_mean_absolute_error: 2.8241 Epoch 119/200 501/501 [==============================] - 2s 3ms/step - loss: 259640.3750 - mean_absolute_error: 30.6095 - val_loss: 21.4986 - val_mean_absolute_error: 2.7195 Epoch 120/200 501/501 [==============================] - 2s 4ms/step - loss: 259647.1094 - mean_absolute_error: 30.7547 - val_loss: 20.0624 - val_mean_absolute_error: 2.5151 Epoch 121/200 501/501 [==============================] - 2s 3ms/step - loss: 259618.8594 - mean_absolute_error: 30.6892 - val_loss: 20.8285 - val_mean_absolute_error: 2.5938 Epoch 122/200 501/501 [==============================] - 2s 4ms/step - loss: 259615.2656 - mean_absolute_error: 30.5285 - val_loss: 20.1657 - val_mean_absolute_error: 2.5588 Epoch 123/200 501/501 [==============================] - 2s 4ms/step - loss: 259617.1719 - mean_absolute_error: 30.9343 - val_loss: 21.5952 - val_mean_absolute_error: 2.6187 Epoch 124/200 501/501 [==============================] - 2s 4ms/step - loss: 259573.6406 - mean_absolute_error: 30.5889 - val_loss: 20.4744 - val_mean_absolute_error: 2.5195 Epoch 125/200 501/501 [==============================] - 2s 3ms/step - loss: 259539.1250 - mean_absolute_error: 30.5513 - val_loss: 20.2873 - val_mean_absolute_error: 2.5218 Epoch 126/200 501/501 [==============================] - 2s 3ms/step - loss: 259584.4531 - mean_absolute_error: 30.4283 - val_loss: 22.1396 - val_mean_absolute_error: 2.6143 Epoch 127/200 501/501 [==============================] - 2s 3ms/step - loss: 259561.4531 - mean_absolute_error: 30.7230 - val_loss: 20.7738 - val_mean_absolute_error: 2.5848 Epoch 128/200 501/501 [==============================] - 2s 3ms/step - loss: 259558.4375 - mean_absolute_error: 30.4008 - val_loss: 20.1169 - val_mean_absolute_error: 2.5137 Epoch 129/200 501/501 [==============================] - 2s 4ms/step - loss: 259508.8906 - mean_absolute_error: 30.5457 - val_loss: 21.4751 - val_mean_absolute_error: 2.5580 Epoch 130/200 501/501 [==============================] - 2s 3ms/step - loss: 259545.4062 - mean_absolute_error: 30.4502 - val_loss: 22.3325 - val_mean_absolute_error: 2.6661 Epoch 131/200 501/501 [==============================] - 2s 3ms/step - loss: 259558.7344 - mean_absolute_error: 30.6563 - val_loss: 24.4177 - val_mean_absolute_error: 2.6372 Epoch 132/200 501/501 [==============================] - 2s 3ms/step - loss: 259522.8281 - mean_absolute_error: 31.1888 - val_loss: 29.7138 - val_mean_absolute_error: 2.9662 Epoch 133/200 501/501 [==============================] - 2s 3ms/step - loss: 259521.6875 - mean_absolute_error: 30.8536 - val_loss: 26.2694 - val_mean_absolute_error: 2.8562 Epoch 134/200 501/501 [==============================] - 2s 3ms/step - loss: 259508.5625 - mean_absolute_error: 30.7515 - val_loss: 23.8000 - val_mean_absolute_error: 2.6932 Epoch 135/200 501/501 [==============================] - 2s 3ms/step - loss: 259459.2188 - mean_absolute_error: 30.6157 - val_loss: 21.5014 - val_mean_absolute_error: 2.5659 Epoch 136/200 501/501 [==============================] - 2s 3ms/step - loss: 259441.2188 - mean_absolute_error: 30.4868 - val_loss: 23.7934 - val_mean_absolute_error: 2.6463 Epoch 137/200 501/501 [==============================] - 2s 3ms/step - loss: 259413.3594 - mean_absolute_error: 30.5451 - val_loss: 23.9045 - val_mean_absolute_error: 2.7162 Epoch 138/200 501/501 [==============================] - 2s 4ms/step - loss: 259437.3750 - mean_absolute_error: 30.5040 - val_loss: 24.4658 - val_mean_absolute_error: 2.6835 Epoch 139/200 501/501 [==============================] - 2s 4ms/step - loss: 259452.9688 - mean_absolute_error: 30.5354 - val_loss: 23.3943 - val_mean_absolute_error: 2.6421 Epoch 140/200 501/501 [==============================] - 2s 3ms/step - loss: 259454.8438 - mean_absolute_error: 30.4069 - val_loss: 22.8800 - val_mean_absolute_error: 2.6393 Epoch 141/200 501/501 [==============================] - 2s 4ms/step - loss: 259416.1719 - mean_absolute_error: 30.5802 - val_loss: 22.7050 - val_mean_absolute_error: 2.5803 Epoch 142/200 501/501 [==============================] - 2s 3ms/step - loss: 259395.4844 - mean_absolute_error: 30.3961 - val_loss: 22.1188 - val_mean_absolute_error: 2.5822 Epoch 143/200 501/501 [==============================] - 2s 3ms/step - loss: 259347.9531 - mean_absolute_error: 30.5907 - val_loss: 24.1933 - val_mean_absolute_error: 2.6889 Epoch 144/200 501/501 [==============================] - 2s 3ms/step - loss: 259344.9219 - mean_absolute_error: 30.4284 - val_loss: 25.0915 - val_mean_absolute_error: 2.6487 Epoch 145/200 501/501 [==============================] - 2s 3ms/step - loss: 259321.4844 - mean_absolute_error: 30.5033 - val_loss: 26.3385 - val_mean_absolute_error: 2.7767 Epoch 146/200 501/501 [==============================] - 2s 3ms/step - loss: 259358.1250 - mean_absolute_error: 30.5348 - val_loss: 24.6696 - val_mean_absolute_error: 2.6135 Epoch 147/200 501/501 [==============================] - 2s 4ms/step - loss: 259309.6562 - mean_absolute_error: 30.8699 - val_loss: 31.9552 - val_mean_absolute_error: 3.0397 Epoch 148/200 501/501 [==============================] - 2s 3ms/step - loss: 259392.0625 - mean_absolute_error: 30.6235 - val_loss: 30.1417 - val_mean_absolute_error: 2.9569 Epoch 149/200 501/501 [==============================] - 2s 3ms/step - loss: 259282.7969 - mean_absolute_error: 30.9127 - val_loss: 28.7623 - val_mean_absolute_error: 2.9123 Epoch 150/200 501/501 [==============================] - 2s 3ms/step - loss: 259283.9844 - mean_absolute_error: 30.5673 - val_loss: 26.6851 - val_mean_absolute_error: 2.8187 Epoch 151/200 501/501 [==============================] - 2s 3ms/step - loss: 259290.5625 - mean_absolute_error: 30.6770 - val_loss: 24.2699 - val_mean_absolute_error: 2.6346 Epoch 152/200 501/501 [==============================] - 2s 3ms/step - loss: 259347.5469 - mean_absolute_error: 30.4721 - val_loss: 26.7382 - val_mean_absolute_error: 2.6776 Epoch 153/200 501/501 [==============================] - 2s 3ms/step - loss: 259298.9531 - mean_absolute_error: 30.7308 - val_loss: 30.8590 - val_mean_absolute_error: 2.9444 Epoch 154/200 501/501 [==============================] - 2s 3ms/step - loss: 259271.6250 - mean_absolute_error: 30.5378 - val_loss: 29.6793 - val_mean_absolute_error: 2.9056 Epoch 155/200 501/501 [==============================] - 2s 3ms/step - loss: 259244.1094 - mean_absolute_error: 30.6281 - val_loss: 27.9609 - val_mean_absolute_error: 2.7700 Epoch 156/200 501/501 [==============================] - 2s 3ms/step - loss: 259328.1719 - mean_absolute_error: 30.4736 - val_loss: 26.9987 - val_mean_absolute_error: 2.7148 Epoch 157/200 501/501 [==============================] - 2s 4ms/step - loss: 259284.1719 - mean_absolute_error: 30.5951 - val_loss: 27.5849 - val_mean_absolute_error: 2.7902 Epoch 158/200 501/501 [==============================] - 2s 3ms/step - loss: 259200.3438 - mean_absolute_error: 30.4329 - val_loss: 27.1120 - val_mean_absolute_error: 2.7171 Epoch 159/200 501/501 [==============================] - 2s 3ms/step - loss: 259217.9844 - mean_absolute_error: 30.8414 - val_loss: 29.6938 - val_mean_absolute_error: 2.8192 Epoch 160/200 501/501 [==============================] - 2s 3ms/step - loss: 259217.2969 - mean_absolute_error: 30.4625 - val_loss: 24.9298 - val_mean_absolute_error: 2.6503 Epoch 161/200 501/501 [==============================] - 2s 3ms/step - loss: 259178.1250 - mean_absolute_error: 30.6962 - val_loss: 25.4819 - val_mean_absolute_error: 2.6244 Epoch 162/200 501/501 [==============================] - 2s 3ms/step - loss: 259178.0625 - mean_absolute_error: 30.4821 - val_loss: 22.9266 - val_mean_absolute_error: 2.5262 Epoch 163/200 501/501 [==============================] - 2s 3ms/step - loss: 259161.9375 - mean_absolute_error: 30.3549 - val_loss: 24.1002 - val_mean_absolute_error: 2.6027 Epoch 164/200 501/501 [==============================] - 2s 3ms/step - loss: 259120.5625 - mean_absolute_error: 30.4824 - val_loss: 24.6062 - val_mean_absolute_error: 2.6291 Epoch 165/200 501/501 [==============================] - 2s 4ms/step - loss: 259160.1094 - mean_absolute_error: 30.3915 - val_loss: 25.1952 - val_mean_absolute_error: 2.6627 Epoch 166/200 501/501 [==============================] - 2s 4ms/step - loss: 259125.2969 - mean_absolute_error: 31.0181 - val_loss: 32.4063 - val_mean_absolute_error: 3.0310 Epoch 167/200 501/501 [==============================] - 2s 3ms/step - loss: 259197.8438 - mean_absolute_error: 30.6454 - val_loss: 28.8840 - val_mean_absolute_error: 2.8560 Epoch 168/200 501/501 [==============================] - 2s 4ms/step - loss: 259121.3438 - mean_absolute_error: 30.7387 - val_loss: 28.9965 - val_mean_absolute_error: 2.9243 Epoch 169/200 501/501 [==============================] - 2s 4ms/step - loss: 259109.4219 - mean_absolute_error: 30.5971 - val_loss: 26.4365 - val_mean_absolute_error: 2.8056 Epoch 170/200 501/501 [==============================] - 2s 4ms/step - loss: 259044.4375 - mean_absolute_error: 30.5890 - val_loss: 26.3343 - val_mean_absolute_error: 2.7405 Epoch 171/200 501/501 [==============================] - 2s 3ms/step - loss: 259100.3594 - mean_absolute_error: 30.5155 - val_loss: 24.9448 - val_mean_absolute_error: 2.7181 Epoch 172/200 501/501 [==============================] - 2s 3ms/step - loss: 259064.5000 - mean_absolute_error: 30.6616 - val_loss: 25.0858 - val_mean_absolute_error: 2.6478 Epoch 173/200 501/501 [==============================] - 2s 3ms/step - loss: 259062.2031 - mean_absolute_error: 30.4758 - val_loss: 24.5966 - val_mean_absolute_error: 2.6542 Epoch 174/200 501/501 [==============================] - 2s 4ms/step - loss: 259078.3906 - mean_absolute_error: 30.4178 - val_loss: 24.4360 - val_mean_absolute_error: 2.5819 Epoch 175/200 501/501 [==============================] - 2s 4ms/step - loss: 259060.1250 - mean_absolute_error: 30.5107 - val_loss: 28.4264 - val_mean_absolute_error: 2.7729 Epoch 176/200 501/501 [==============================] - 2s 4ms/step - loss: 259035.6250 - mean_absolute_error: 30.4872 - val_loss: 26.7907 - val_mean_absolute_error: 2.7030 Epoch 177/200 501/501 [==============================] - 2s 4ms/step - loss: 258983.5625 - mean_absolute_error: 30.7168 - val_loss: 29.4184 - val_mean_absolute_error: 2.8804 Epoch 178/200 501/501 [==============================] - 2s 3ms/step - loss: 259099.0000 - mean_absolute_error: 30.4981 - val_loss: 30.2047 - val_mean_absolute_error: 2.8527 Epoch 179/200 501/501 [==============================] - 2s 3ms/step - loss: 259014.2969 - mean_absolute_error: 30.6986 - val_loss: 34.4102 - val_mean_absolute_error: 2.9292 Epoch 180/200 501/501 [==============================] - 2s 3ms/step - loss: 259009.9688 - mean_absolute_error: 30.6176 - val_loss: 31.5209 - val_mean_absolute_error: 2.7934 Epoch 181/200 501/501 [==============================] - 2s 3ms/step - loss: 258972.0781 - mean_absolute_error: 30.4706 - val_loss: 28.6259 - val_mean_absolute_error: 2.6865 Epoch 182/200 501/501 [==============================] - 2s 4ms/step - loss: 258985.3594 - mean_absolute_error: 30.5693 - val_loss: 26.9487 - val_mean_absolute_error: 2.6082 Epoch 183/200 501/501 [==============================] - 2s 3ms/step - loss: 258983.4688 - mean_absolute_error: 30.7164 - val_loss: 34.4562 - val_mean_absolute_error: 2.9783 Epoch 184/200 501/501 [==============================] - 2s 4ms/step - loss: 258976.1562 - mean_absolute_error: 30.5661 - val_loss: 32.6643 - val_mean_absolute_error: 2.9231 Epoch 185/200 501/501 [==============================] - 2s 4ms/step - loss: 258928.2188 - mean_absolute_error: 30.5372 - val_loss: 30.1939 - val_mean_absolute_error: 2.8019 Epoch 186/200 501/501 [==============================] - 2s 3ms/step - loss: 258952.5625 - mean_absolute_error: 30.3758 - val_loss: 31.0606 - val_mean_absolute_error: 2.8305 Epoch 187/200 501/501 [==============================] - 2s 3ms/step - loss: 258903.6250 - mean_absolute_error: 30.7325 - val_loss: 34.5546 - val_mean_absolute_error: 2.9765 Epoch 188/200 501/501 [==============================] - 2s 4ms/step - loss: 258958.0469 - mean_absolute_error: 30.8061 - val_loss: 34.9509 - val_mean_absolute_error: 3.0035 Epoch 189/200 501/501 [==============================] - 2s 3ms/step - loss: 258900.6562 - mean_absolute_error: 30.5755 - val_loss: 30.1790 - val_mean_absolute_error: 2.8439 Epoch 190/200 501/501 [==============================] - 2s 3ms/step - loss: 258893.7656 - mean_absolute_error: 30.5135 - val_loss: 31.6960 - val_mean_absolute_error: 2.9175 Epoch 191/200 501/501 [==============================] - 2s 3ms/step - loss: 258925.9688 - mean_absolute_error: 30.3642 - val_loss: 30.1893 - val_mean_absolute_error: 2.8385 Epoch 192/200 501/501 [==============================] - 2s 3ms/step - loss: 258826.3438 - mean_absolute_error: 30.4609 - val_loss: 27.8942 - val_mean_absolute_error: 2.7800 Epoch 193/200 501/501 [==============================] - 2s 3ms/step - loss: 258883.2188 - mean_absolute_error: 30.4673 - val_loss: 29.7278 - val_mean_absolute_error: 2.7349 Epoch 194/200 501/501 [==============================] - 2s 4ms/step - loss: 258848.0781 - mean_absolute_error: 30.4030 - val_loss: 28.3689 - val_mean_absolute_error: 2.7899 Epoch 195/200 501/501 [==============================] - 2s 3ms/step - loss: 258801.3906 - mean_absolute_error: 30.6590 - val_loss: 28.4875 - val_mean_absolute_error: 2.5997 Epoch 196/200 501/501 [==============================] - 2s 3ms/step - loss: 258807.5156 - mean_absolute_error: 30.4316 - val_loss: 27.3241 - val_mean_absolute_error: 2.6365 Epoch 197/200 501/501 [==============================] - 2s 3ms/step - loss: 258821.0469 - mean_absolute_error: 30.4334 - val_loss: 34.4295 - val_mean_absolute_error: 2.9534 Epoch 198/200 501/501 [==============================] - 2s 3ms/step - loss: 258763.4062 - mean_absolute_error: 30.8836 - val_loss: 38.9884 - val_mean_absolute_error: 3.1819 Epoch 199/200 501/501 [==============================] - 2s 3ms/step - loss: 258774.4844 - mean_absolute_error: 30.8386 - val_loss: 29.5166 - val_mean_absolute_error: 2.7986 Epoch 200/200 501/501 [==============================] - 2s 3ms/step - loss: 258843.1406 - mean_absolute_error: 30.4756 - val_loss: 30.0912 - val_mean_absolute_error: 2.8136
from matplotlib import pylab as plt
plt.plot(history.history['loss'],label='train')
plt.plot(history.history['val_loss'],label='test')
plt.legend()
<matplotlib.legend.Legend at 0x1897a192388>
x_test_reshaped=x_test_scaled.reshape((x_test_scaled.shape[0],1,x_test_scaled.shape[1]))
pred = model.predict(x_test_reshaped)
pred.shape
(1464, 3)
test_data['power_pred']=pred
C:\Users\suraj\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
test_data.head()
| power | cloud_cover | apparent_temperature | temperature | humidity | dew_point | wind_bearing | wind_speed | wind_gust | pressure | uv_index | ozone | precip_intensity | precip_probability | precip_type | visibility | icon | summary | power_pred | power_pred1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||||||||||||||
| 2019-08-01 00:00:00 | 0.0 | 0.99 | 36.36 | 30 | 1 | 25.01 | 121.0 | 3.45 | 6.34 | 999.06 | 0.0 | 280.3 | 0.00 | 0.00 | 0.000000 | 16.09 | 0.095822 | 0.052427 | 0.338723 | 0.186789 |
| 2019-08-01 01:00:00 | 0.0 | 0.99 | 35.66 | 30 | 1 | 24.94 | 126.0 | 3.52 | 6.44 | 998.69 | 0.0 | 280.3 | 0.00 | 0.00 | 0.000000 | 16.09 | 0.095822 | 0.052427 | 0.325856 | 0.178427 |
| 2019-08-01 02:00:00 | 0.0 | 0.99 | 34.88 | 29 | 1 | 25.01 | 128.0 | 3.55 | 6.49 | 998.46 | 0.0 | 280.2 | 0.00 | 0.00 | 0.000000 | 16.09 | 0.095822 | 0.052427 | 0.385216 | 0.193334 |
| 2019-08-01 03:00:00 | 0.0 | 0.98 | 33.43 | 28 | 1 | 25.14 | 124.0 | 3.46 | 6.53 | 998.59 | 0.0 | 280.3 | 0.07 | 0.09 | 0.451355 | 16.09 | 0.095822 | 0.052427 | 0.402874 | 0.190655 |
| 2019-08-01 04:00:00 | 0.0 | 0.95 | 31.50 | 27 | 1 | 25.20 | 114.0 | 3.25 | 6.55 | 998.78 | 0.0 | 280.4 | 0.36 | 0.17 | 0.451355 | 16.09 | 0.095822 | 0.052427 | 0.528821 | 0.294492 |
power_series=test_data.loc[:,('power','power_pred','power_pred1')]
power_series.plot()
<AxesSubplot:xlabel='datetime'>
fig=px.line(test_data,y=['power_pred','power','power_pred1'],title='Energy generation')
fig.update_xaxes(
rangeslider_visible= True,
rangeselector=dict(
buttons = list([
dict(count = 1,label = '1m',step='month',stepmode = "backward"),
dict(count = 2,label = '6m',step='month',stepmode = "backward"),
dict(count = 3,label = '6mYTD',step='month',stepmode = "todate"),
dict(count = 4,label = '2y',step='year',stepmode = "backward"),
dict(count = 5,label = '3y',step='year',stepmode = "backward"),
dict(step= 'all')
])
)
)
fig.show()
fig=px.line(test_data,y=['power','power_pred1'],title='Energy generation')
fig.update_xaxes(
rangeslider_visible= True,
rangeselector=dict(
buttons = list([
dict(count = 1,label = '1m',step='month',stepmode = "backward"),
dict(count = 2,label = '6m',step='month',stepmode = "backward"),
dict(count = 3,label = '6mYTD',step='month',stepmode = "todate"),
dict(count = 4,label = '2y',step='year',stepmode = "backward"),
dict(count = 5,label = '3y',step='year',stepmode = "backward"),
dict(step= 'all')
])
)
)
fig.show()
new_test=df_weather_forecast.loc['2019-10-01':'2019-10-27']
x_test_n, y_test_n=new_test.drop(['power'],axis=1),new_test.power
x_new_test_scaled=scaler.fit_transform(x_test_n)
x_new_test_reshaped=x_new_test_scaled.reshape((x_new_test_scaled.shape[0],1,x_new_test_scaled.shape[1]))
x_test_reshaped.shape
(1464, 1, 17)
preds = model.predict(x_new_test_reshaped)
new_test['power_pred']=preds
C:\Users\suraj\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
power_series=new_test.loc[:,'power_pred']
power_series.plot()
<AxesSubplot:xlabel='datetime_local'>
fig=px.line(new_test,y='power_pred',title='Energy generation')
fig.update_xaxes(
rangeslider_visible= True,
rangeselector=dict(
buttons = list([
dict(count = 1,label = '1m',step='month',stepmode = "backward"),
dict(count = 2,label = '6m',step='month',stepmode = "backward"),
dict(count = 3,label = '6mYTD',step='month',stepmode = "todate"),
dict(count = 4,label = '2y',step='year',stepmode = "backward"),
dict(count = 5,label = '3y',step='year',stepmode = "backward"),
dict(step= 'all')
])
)
)
fig.show()
new_test.to_csv('forecast_October_2019.csv')